* Reset settings and initialize log file
launch, path("share/sass")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Analyze gender differences in summer earnings among teachers.
*-------------------------------------------------------------------------------


* Prepare SASS data
*-------------------------------------------------------------------------------

* Load SASS teacher data
use "$basepath/data/derived/sass_teacher.dta", clear

* Restrict to full-time teachers
keep if main_assign == 1

* Combine all school and non-school work
gen summer_sch = (summer_teach == 1 | summer_nonteach_sch == 1)
gen reg_sch    = (reg_sch_oth == 1 | reg_sch_bonus == 1)
gen sch_any    = (reg_sch_oth == 1 | reg_sch_bonus == 1 | reg_other == 1)

* Specify an inflation factor for topcoded earnings
local w = 1.5

* Take midpoint of each earnings category and inflate topcoded earnings
gen reg_salary_num = .
replace reg_salary_num = (25000 + 1)/2         if reg_salary == 1
replace reg_salary_num = (30000 + 25001)/2     if reg_salary == 2
replace reg_salary_num = (35000 + 30001)/2     if reg_salary == 3
replace reg_salary_num = (45000 + 35001)/2     if reg_salary == 4
replace reg_salary_num = (45001) * `w'         if reg_salary == 5
replace reg_salary_num = 0                     if reg_salary == .

gen reg_sch_oth_num = .
replace reg_sch_oth_num = (599 + 1)/2          if reg_sch_oth_amt == 1
replace reg_sch_oth_num = (1499 + 600)/2       if reg_sch_oth_amt == 2
replace reg_sch_oth_num = (2999 + 1500)/2      if reg_sch_oth_amt == 3
replace reg_sch_oth_num = (3000) * `w'         if reg_sch_oth_amt == 4
replace reg_sch_oth_num = 0                    if reg_sch_oth_amt == .

gen reg_sch_bonus_num = .
replace reg_sch_bonus_num = (499 + 1)/2        if reg_sch_bonus_amt == 1
replace reg_sch_bonus_num = (999 + 500)/2      if reg_sch_bonus_amt == 2
replace reg_sch_bonus_num = (1999 + 1000)/2    if reg_sch_bonus_amt == 3
replace reg_sch_bonus_num = (2000) * `w'       if reg_sch_bonus_amt == 4
replace reg_sch_bonus_num = 0                  if reg_sch_bonus_amt == .

gen reg_other_num = .
replace reg_other_num = (999 + 1)/2            if reg_other_amt == 1
replace reg_other_num = (2999 + 1000)/2        if reg_other_amt == 2
replace reg_other_num = (5999 + 3000)/2        if reg_other_amt == 3
replace reg_other_num = (6000) * `w'           if reg_other_amt == 4
replace reg_other_num = 0                      if reg_other_amt == .

gen summer_teach_num = .
replace summer_teach_num = (999 + 1)/2         if summer_teach_amt == 1
replace summer_teach_num = (1999 + 1000)/2     if summer_teach_amt == 2
replace summer_teach_num = (3999 + 2000)/2     if summer_teach_amt == 3
replace summer_teach_num = (4000) * `w'        if summer_teach_amt == 4
replace summer_teach_num = 0                   if summer_teach_amt == .

gen summer_nonteach_num = .
replace summer_nonteach_num = (999 + 1)/2      if summer_nonteach_sch_amt == 1
replace summer_nonteach_num = (1999 + 1000)/2  if summer_nonteach_sch_amt == 2
replace summer_nonteach_num = (3999 + 2000)/2  if summer_nonteach_sch_amt == 3
replace summer_nonteach_num = (4000) * `w'     if summer_nonteach_sch_amt == 4
replace summer_nonteach_num = 0                if summer_nonteach_sch_amt == .

gen summer_nonschool_num = .
replace summer_nonschool_num = (999 + 1)/2     if summer_nonschool_amt == 1
replace summer_nonschool_num = (1999 + 1000)/2 if summer_nonschool_amt == 2
replace summer_nonschool_num = (3999 + 2000)/2 if summer_nonschool_amt == 3
replace summer_nonschool_num = (4000) * `w'    if summer_nonschool_amt == 4
replace summer_nonschool_num = 0               if summer_nonschool_amt == .

* School and non-school supplemental earnings, summer and regular year
gen reg_earn        = reg_sch_oth_num + reg_sch_bonus_num + reg_other_num
gen reg_sch_earn    = reg_sch_oth_num + reg_sch_bonus_num
gen summer_earn     = summer_teach_num + summer_nonteach_num + summer_nonschool_num
gen summer_sch_earn = summer_teach_num + summer_nonteach_num

* Deflate earnings to 2019 USD
gen tm = ym(1999, 8)
merge m:1 tm using "$basepath/data/derived/pce.dta", assert(2 3) keep(3) nogenerate

foreach v of varlist *_num *_earn {
	replace `v' = `v'/pce
}


* Estimate regressions
*-------------------------------------------------------------------------------

* Specify regression controls
local controls "totexper i.age_cat i.wbho masters i.urbanic i.region i.schlevel i.assign"

* Loop over groups of outcomes
foreach y in "work" "earnings" {
	local c = 1

	* Create a matrix to store gender differences in summer and supplemental school-year work
	matrix `y'_gap_raw = J(1, 4, .)
	matrix `y'_gap_controls = J(1, 4, .)
	matrix colnames `y'_gap_raw = spec beta se cons
	matrix colnames `y'_gap_controls = spec beta se cons

	* List outcomes to be examined
	if "`y'" == "work" {
		local yvars "sch_any reg_sch reg_other summer_any summer_sch summer_nonschool"
	}
	else if "`y'" == "earnings" {
		local yvars "reg_earn reg_sch_earn reg_other_num summer_earn summer_sch_earn summer_nonschool_num"
	}

	foreach yvar of local yvars {
		* Estimate specifications without/with controls
		foreach g in "raw" "controls" {
			if "`g'" == "raw" local xvars
			if "`g'" == "controls" local xvars `controls'

			* Run regression
			quietly reg `yvar' female `xvars' [aw = tfnlwgt], robust
			local cons_`g' = _b[_cons]

			* Update the matrix, using the constant coefficient from the raw specification
			matrix row_temp = [`c' , _b[female], _se[female], `cons_raw']
			matrix `y'_gap_`g' = `y'_gap_`g' \ row_temp
		}

		local c = `c' + 1
	}

	* Label rows
	local yvars = "empty `yvars' "
	matrix rownames `y'_gap_raw = `yvars'
	matrix rownames `y'_gap_controls = `yvars'
}


* Create figure
*-------------------------------------------------------------------------------

* Report the gender gap in having positive summer earnings
matrix list work_gap_raw
matrix list work_gap_controls
local summer_gap = el(work_gap_controls, 5, 2)
display "Gender gap in having summer earnings: " %5.1f `=100 * `summer_gap'' " percent"
local reg_summer_diff = `summer_gap' - el(work_gap_controls, 2, 2)
display "Summer gap minus school-year gap: " %5.1f `=100 * `reg_summer_diff'' " percent"

* Report the raw and regression-adjusted earnings estimates
matrix list earnings_gap_raw
matrix list earnings_gap_controls

* Load estimates from the specification that includes controls
clear
svmat earnings_gap_controls, names(col)
drop if missing(spec)

* Construct confidence interval
gen beta_low = -beta - 1.96 * se
gen beta_upp = -beta + 1.96 * se
format cons %12.2f

* For each outcome, stack (1) male mean, (2) female mean, and (3) gender gap
gen f0 = cons
gen f1 = cons + beta
gen f2 = f0 - f1
reshape long f, i(spec) j(female)

* Handle bar placement
gen xval = .
replace xval = female if spec == 1
replace xval = female + 3.5 if spec == 2
replace xval = female + 7 if spec == 3
replace xval = female + 12 if spec == 4
replace xval = female + 15.5 if spec == 5
replace xval = female + 19 if spec == 6

* Plot the estimates
#delimit ;
twoway
	(bar f xval if female == 0, color($col1) fcolor(*1.20) lcolor(black) lwidth(vthin))
	(bar f xval if female == 1, color($col2) fcolor(*0.80) lcolor(black) lwidth(vthin))
	(bar f xval if female == 2, color($col3) fcolor(*0.40) lcolor(black) lwidth(vthin))
	(rcap beta_upp beta_low xval if female == 2, lcolor(gray) lalign(outside)),
	text(4800 4.5 "{bf:During the regular year}")
	text(4800 16.3 "{bf:During the summer}")
	xtitle("")
	xlabel(0.75 "Any" 4.25 "School" 7.75 "Non-school" 12.75 "Any" 16.25 "School" 19.75 "Non-school", noticks labsize(medsmall))
	ytitle("Earnings from extra work (2019 $)")
	yscale(range(0 4800))
	ylabel(0(500)4500)
	graphregion(margin(b=0))
	plotregion(margin(b=0))
	legend(order(1 "Men" 2 "Women" 3 "Gender gap") rows(1));
#delimit cr

nicepdf "$basepath/output/sass.pdf", indirect replace

* Report the regression-adjusted gender gap in summer earnings
list beta if spec == 4 & female == 2

* Close the log file
unlaunch
